"""
# 对数变换实战 新闻流行度预测问题中输入和输出相关性可视化
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

news_df = pd.read_csv('../数据集/OnlineNewsPopularity.csv', delimiter=',', header=0)

sns.set_style('whitegrid')
fig, (ax_orig, ax_log) = plt.subplots(2, 1)
ax_orig.scatter(news_df[' n_tokens_content'], news_df[' shares'])
ax_orig.tick_params(labelsize=14)
ax_orig.set_xlabel('Number of Words in Article', fontsize=14)
ax_orig.set_ylabel('Number of Article', fontsize=14)

news_df['log_n_tokens_content'] = np.log10(news_df[' n_tokens_content'] + 1)
ax_log.scatter(news_df['log_n_tokens_content'], news_df[' shares'])
ax_log.tick_params(labelsize=14)
ax_log.set_xlabel('Log of Number of Words in Article', fontsize=14)
ax_log.set_ylabel('Number of Article', fontsize=14)

plt.savefig('./可视化/新闻流行度预测问题中输入和输出相关性可视化.png')
plt.show()
